library(haven)

# Load in Data set
impdat20_natam <- read_spss("./data//Native_American_IAT.public.2020.sav")
# Subset to USA
impdat20_natam <- subset(impdat20_natam, countryres_num == 1)

# Meta Info ---------------------------------------------------------------
impdat20_natam$date_mdy <- format(impdat20_natam$date, "%Y-%m-%d")
impdat20_natam$date_mdy <- as.Date(impdat20_natam$date_mdy)
impdat20_natam$week <- format(as.Date(impdat20_natam$date), "%W")

# Demographics ---------------------------------------------------------------
# Age
impdat20_natam$age[which(impdat20_natam$birthyear > 1900)] <- impdat20_natam$year[which(impdat20_natam$birthyear > 1900)] - impdat20_natam$birthyear[which(impdat20_natam$birthyear > 1900)]
impdat20_natam$age_sc <- (impdat20_natam$age-min(impdat20_natam$age, na.rm = T))/(max(impdat20_natam$age, na.rm = T)-min(impdat20_natam$age, na.rm = T))

impdat20_natam$age_cat4 <- NA
impdat20_natam$age_cat4[which(impdat20_natam$age >= 18 & impdat20_natam$age <= 29)] <- "18-29"
impdat20_natam$age_cat4[which(impdat20_natam$age >= 30 & impdat20_natam$age <= 44)] <- "30-44"
impdat20_natam$age_cat4[which(impdat20_natam$age >= 45 & impdat20_natam$age <= 64)] <- "45-64"
impdat20_natam$age_cat4[which(impdat20_natam$age >= 65)] <- "65+"
impdat20_natam$age_cat4 <- as.factor(impdat20_natam$age_cat4)

# Sex
impdat20_natam$sex <- NA
impdat20_natam$sex[which(impdat20_natam$birthSex == 2)] <- "f"
impdat20_natam$sex[which(impdat20_natam$birthSex == 1)] <- "m"

# Race and Ethnicity
impdat20_natam$race <- factor(impdat20_natam$raceomb_002, 
                           levels = c(1:max(impdat20_natam$raceomb_002, na.rm = T)),
                           labels = c("American Indian/Alaskan Native",
                                      "East Asian",
                                      "South Asian",
                                      "Native Hawaiian/Pacific Islander",
                                      "Black",
                                      "White",
                                      "Other", 
                                      "Multiracial"))

impdat20_natam$hisp <- NA
impdat20_natam$hisp[which(impdat20_natam$ethnicityomb == 1)] <- 1
impdat20_natam$hisp[which(impdat20_natam$ethnicityomb == 2)] <- 0 # drops unknowns

impdat20_natam$race5 <- NA
impdat20_natam$race5[which(impdat20_natam$race %in% c("East Asian", "South Asian", "Native Hawaiian/Pacific Islander") & impdat20_natam$hisp == 0)] <- "Asian/Pacific Islander"
impdat20_natam$race5[which(impdat20_natam$race == "Black" & impdat20_natam$hisp == 0)] <- "Black"
impdat20_natam$race5[which(impdat20_natam$race == "White" & impdat20_natam$hisp == 0)] <- "White"
impdat20_natam$race5[which(impdat20_natam$race %in% c("Other", "Multiracial") & impdat20_natam$hisp == 0)] <- "Other"
impdat20_natam$race5[which(impdat20_natam$hisp == 1)] <- "Hispanic"
impdat20_natam$race5 <- as.factor(impdat20_natam$race5)
impdat20_natam$race5 <- factor(impdat20_natam$race5, levels(impdat20_natam$race5)[c(1:3,5,4)])

impdat20_natam$race3 <- NA
impdat20_natam$race3[which(impdat20_natam$race5 == "White")] <- "White"
impdat20_natam$race3[which(impdat20_natam$race5 == "Black")] <- "Black"
impdat20_natam$race3[which(impdat20_natam$race5 != "Black" & impdat20_natam$race5 != "White")] <- "Other"

# Ideology
impdat20_natam$ideo3 <- NA
impdat20_natam$ideo3[which(impdat20_natam$politicalid_7 < 4)] <- 1
impdat20_natam$ideo3[which(impdat20_natam$politicalid_7 == 4)] <- 0
impdat20_natam$ideo3[which(impdat20_natam$politicalid_7 > 4)] <- -1
impdat20_natam$ideo3_lab <- factor(impdat20_natam$ideo3,
                                labels = c("liberal", "moderate", "conservative"))

impdat20_natam$ideo_lib <- ifelse(impdat20_natam$ideo3 == -1, 1, 0)
impdat20_natam$ideo_mod <- ifelse(impdat20_natam$ideo3 == 0, 1, 0)
impdat20_natam$ideo_con <- ifelse(impdat20_natam$ideo3 == 1, 1, 0)

impdat20_natam$ideo7 <- impdat20_natam$politicalid_7*-1 + 7
impdat20_natam$ideo7_sc <- impdat20_natam$ideo7/6

# Region
impdat20_natam$south <- NA
impdat20_natam$south <- ifelse(impdat20_natam$STATE %in% c("VA", "NC", "SC", "GA", "FL", "MS", "AL", "TN", "AR", "LA", "TX"),
                            1, 0)
impdat20_natam$south[impdat20_natam$STATE == ""] <- NA

impdat20_natam$census_region <- NA
impdat20_natam$census_region[which(impdat20_natam$STATE %in% c("DE", "MD", "DC", "WV", "KY", "OK",
                                                         "VA", "NC", "SC", "GA", "FL", "MS", "AL", 
                                                         "TN", "AR", "LA", "TX"))] <- "South"
impdat20_natam$census_region[which(impdat20_natam$STATE %in% c("CA", "OR", "WA", "MT", "WY",
                                                         "ID", "CO", "UT", "AZ", "NM",
                                                         "NV", "AK", "HI"))] <- "West"
impdat20_natam$census_region[which(impdat20_natam$STATE %in% c("ND", "SD", "MN", "WI",
                                                         "IA", "NE", "KS", "MO",
                                                         "IL", "IN", "MI", "OH"))] <- "Midwest"
impdat20_natam$census_region[which(impdat20_natam$STATE %in% c("ME", "VT", "NH", "MA", "RI",
                                                         "CT", "NY", "NJ", "PA"))] <- "Northeast"
impdat20_natam$census_region[impdat20_natam$STATE == ""] <- NA
impdat20_natam$census_region <- as.factor(impdat20_natam$census_region)

# Education
impdat20_natam$edu_cat <- NA
impdat20_natam$edu_cat[which(impdat20_natam$edu <= 3)] <- 1 # Less than HS
impdat20_natam$edu_cat[which(impdat20_natam$edu == 4)] <- 2 # HS 
impdat20_natam$edu_cat[which(impdat20_natam$edu == 5 | impdat20_natam$edu == 6)] <- 3 # Some college or associates
impdat20_natam$edu_cat[which(impdat20_natam$edu == 7)] <- 4 # College
impdat20_natam$edu_cat[which(impdat20_natam$edu >= 8)] <- 5 # Adv degree or schooling
impdat20_natam$col <- ifelse(impdat20_natam$edu_cat >= 4, 1, 0)

impdat20_natam$edu_cat4 <- NA
impdat20_natam$edu_cat4[which(impdat20_natam$edu_cat <= 2)] <- "HS or Less"
impdat20_natam$edu_cat4[which(impdat20_natam$edu_cat == 3)] <- "Some College"
impdat20_natam$edu_cat4[which(impdat20_natam$edu_cat == 4)] <- "College"
impdat20_natam$edu_cat4[which(impdat20_natam$edu_cat == 5)] <- "Post-Grad"
impdat20_natam$edu_cat4 <- factor(impdat20_natam$edu_cat4,
                               levels = c("HS or Less",
                                          "Some College",
                                          "College",
                                          "Post-Grad"))

# Reason for Visiting
# impdat20_natam$broughtwebsite
impdat20_natam$broughtwebsite2 <- NA
impdat20_natam$broughtwebsite2[which(impdat20_natam$broughtwebsite %in% c("Assignment for school", "Assignment for work"))] <- "Assignment school/work"
impdat20_natam$broughtwebsite2[which(impdat20_natam$broughtwebsite %in% c("Mention in a news story (any medium)"))] <- "News Mention"
impdat20_natam$broughtwebsite2[which(impdat20_natam$broughtwebsite %in% c("My Internet search for this topic or a related topic", "Mention or link at a non-news Internet site"))] <- "Internet Mention/Search"
impdat20_natam$broughtwebsite2[which(impdat20_natam$broughtwebsite %in% "Recommendation of a friend or co-worker")] <- "Peer Recommendation"
impdat20_natam$broughtwebsite2[which(impdat20_natam$broughtwebsite %in% c("Other", "null"))] <- "Other/null"

impdat20_natam$broughtwebsite2 <- as.factor(impdat20_natam$broughtwebsite2)


impdat20_natam$broughtwebsite4 <- NA
impdat20_natam$broughtwebsite4[which(impdat20_natam$broughtwebsite %in% c("Assignment for school", "Assignment for work"))] <- "Assignment school/work"
impdat20_natam$broughtwebsite4[which(impdat20_natam$broughtwebsite %in% c("My Internet search for this topic or a related topic", "Mention or link at a non-news Internet site", "News Mention"))] <- "News/Internet Mention/Search"
impdat20_natam$broughtwebsite4[which(impdat20_natam$broughtwebsite %in% "Recommendation of a friend or co-worker")] <- "Peer Recommendation"
impdat20_natam$broughtwebsite4[which(impdat20_natam$broughtwebsite %in% c("Other", "null"))] <- "Other/null"

impdat20_natam$broughtwebsite4 <- as.factor(impdat20_natam$broughtwebsite4)

# Interview Date Indicators -----------------------------------------------
impdat20_natam$post_gf <- ifelse(impdat20_natam$date_mdy > "2020-05-25", 1, 0)
impdat20_natam$post_gf_1month <- ifelse(impdat20_natam$date_mdy > "2020-05-25" & impdat20_natam$date_mdy <= "2020-06-26", 
                                     1, 0)
impdat20_natam$post_gf_2month <- ifelse(impdat20_natam$date_mdy > "2020-06-26" & impdat20_natam$date_mdy <= "2020-07-26", 
                                     1, 0)
impdat20_natam$post_gf_3month <- ifelse(impdat20_natam$date_mdy > "2020-07-26" & impdat20_natam$date_mdy <= "2020-08-26", 
                                     1, 0)
